/******************************************************************************
*
* Copyright (C) 2009, Gentee, Inc. All rights reserved. 
* This file is part of Perfect Automation software 
* http://www.perfectautomation.com
* 
* THIS FILE IS PROVIDED UNDER THE TERMS OF THE PERFECT AUTOMATION LICENSE 
* ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THIS FILE 
* CONSTITUTES RECIPIENTS ACCEPTANCE OF THE AGREEMENT.
*
* Author: Alexey Krivonogov ( gentee )
*
******************************************************************************/

type  finf
{
   str   name
   uint  size
   uint  owner
}

global
{
   arr dirs  of finf
   arr files of finf
   arr sizes of uint
   str output nline
}

func uint newdir( str name, uint owner )
{
   uint i
   
   i = dirs.expand( 1 )
   dirs[ i ].name = name
   dirs[ i ].owner = owner
   return i
}

func uint newfile( str name, uint size owner )
{
   uint i
   
   i = files.expand( 1 )
   files[ i ].name = name
   files[ i ].size = size
   files[ i ].owner = owner
   return i
}

func scanfolder( str wildcard, uint owner )
{
   ffind fd
   
   fd.init( wildcard, $FIND_FILE | $FIND_DIR )
   foreach cur, fd
   {
      if cur.attrib & $FILE_ATTRIBUTE_DIRECTORY
      {
         scanfolder( cur.fullname + "\\*.*", newdir( cur.name, owner ))
      }
      elif !cur.sizehi : newfile( cur.name, cur.sizelo, owner )      
   }
} 

func scaninit( str folder )
{
   str wildcard

   folder.fdelslash()   
   scanfolder( (wildcard = folder ).faddname( "*.*" ), newdir( folder, 0 ))
}

func int sortsize( uint left right )
{
   return int( files[ left->uint ].size ) - int( files[ right->uint ].size )
}

func sortfiles
{
   uint i
   
   outtext( "Sorting..." )
   sizes.expand( *files )
   fornum i, *sizes : sizes[ i ] = i

   sizes.sort( &sortsize ) 
}

func str getdir( uint id, str ret )
{
   uint owner = dirs[ id ].owner

   if owner : getdir( owner, ret )
   return ret.faddname( dirs[ id ].name )
}

func str getfile( uint id, str ret )
{
   ret.clear()
   
   getdir( files[ id ].owner, ret )
   return ret.faddname( files[ id ].name )
}

func compare( uint minsize )
{
   uint i id next j found count
   str  idname nextname 
   
   outtext( "Looking for duplicates..." )
   output @ "Minimum size of files: \(minsize) bytes\(nline)"
   
   fornum i, *sizes - 1
   {
      id = sizes[ i ]

      if !*files[ id ].name || files[ id ].size < minsize : continue

      found = 0            
      next = sizes[ j = i + 1 ]
      
      while files[ id ].size == files[ next ].size
      {
         if *files[ next ].name &&
             isequalfiles( getfile( id, idname ), getfile( next, nextname ))
         {
            if !found
            {
               output @ "\(nline)Size: \(files[ id ].size) ========\(nline)\( idname )\(nline)" 
            }
            count++
            ( output @ nextname ) @"\(nline)"
            
            found = 1
            files[ next ].name.clear()
         }
         if ++j == *sizes : break
         next = sizes[ j ]
      }
      if i && !( i & 0x3F ) 
      {
         outtextset( "Approved files: \(i) Found the same files: \(count)" )
      }
   }   
   outtextset( "Approved files: \(i) Found the same files: \(count)" )
   output @ "\(nline)=================\(nline)Approved files: \(*files) Found the same files: \(count)\(nline)"
}

func init
{
   dirs.reserve( 1000 )
   files.reserve( 20000 )
   output.reserve( 1000000 )
   dirs.expand( 1 )
}

func search( str outfile ext, uint minsize )
{
   if !*files : return
   nline = ?( ext %== "html", "<br>", "\l" )
   if ext %== "html"
   {
      output = "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">
<HTML><HEAD><TITLE>Same files</TITLE></HEAD>
<BODY BGCOLOR=#FFFFFF>"
   } 
   sortfiles()
   compare( minsize )
   if ext %== "html" : output += "</BODY></HTML>"
         
   output.write( outfile )
   shell( outfile )
}

